# Information -------------------------------------------------------------

# The Invincible Gender Gap in Political Ambition
# Richard L. Fox & Jennifer  L. Lawless
# Replication Code
# 2023


# Library -----------------------------------------------------------------

library(dplyr)
library(tidyr)
library(forcats)
library(janitor)
library(labelled)

# Below code sets working directory relative to project root,
  # If not opening code from rstudio project you will need to replace 
  # with full working directory
here::i_am("code/1_replication.R")
library(here)

### Uncomment and run below code to install any missing packages
# install.packages(c("dplyr",
#                    "tidyr",
#                    "janitor",
#                    "forcats",
#                    "labelled",
#                    "here"))

# Helper function for gender crosstabs w/ preferred formatting
gen_xtab <- function(data, var, ...){
  {{ data }} %>% 
    tabyl({{ var }}, gender, show_na=F, ...) %>% 
    adorn_percentages("col") %>% 
    adorn_pct_formatting(1)
}

# Data --------------------------------------------------------------------

df_numeric <- readRDS(here("data", "PS_Fox-Lawless_Data.Rds")) 

# Changing to factor variables for easy printing w/ crosstabs
df <- df_numeric %>% 
  mutate(across(.cols=!birthyr, as_factor))

# Variables for reference
varlist <- tibble(var = colnames(df),
                  label = var_label(df, unlist=T))


# Figure 1 ----------------------------------------------------------------

# •	Gender crosstab on considered running for office for the four professions sample
df %>% 
  filter(fourprofessions=="Yes") %>% 
  gen_xtab(Considered)

# •	Gender crosstab on considered running for office for broader sample
gen_xtab(df, Considered)


# Figure 2 ----------------------------------------------------------------

# •	Gender crosstab on “very qualified” to run for office
gen_xtab(df, QUALIFIED)

# •	Gender crosstab on recruited by political actor
gen_xtab(df, Formal)

# •	Gender crosstab on encouraged by personal source
gen_xtab(df, Informal)

# Table 1 -----------------------------------------------------------------

# Considered Running Baseline Logistic Regression
m1 <-
  glm(
    Considered ~
      gender +
      educ +
      faminc_new +
      newefficacy +
      newpolparticipation +
      Democrat +
      Republican +
      black +
      latino +
      asian +
      PoliticalInterest +
      married +
      child18 +
      birthyr,
    family = binomial(link = "logit"),
    data = df_numeric
  )
summary(m1)

# Took a Concrete Step Baseline Logistic Regression
m2 <-
  glm(
    anystep ~
      gender +
      educ +
      faminc_new +
      newefficacy +
      newpolparticipation +
      Democrat +
      Republican +
      black +
      latino +
      asian +
      PoliticalInterest +
      married +
      child18 +
      birthyr,
    family = binomial(link = "logit"),
    data = df_numeric
  )
summary(m2)

# Considered Running More Specified Logistic Regression
m3 <-
  glm(
    Considered ~
      gender +
      educ +
      faminc_new +
      newefficacy +
      newpolparticipation +
      Democrat +
      Republican +
      black +
      latino +
      asian +
      PoliticalInterest +
      married +
      child18 +
      birthyr + 
      QUALIFIED + 
      Formal + 
      Informal,
    family = binomial(link = "logit"),
    data = df_numeric
  )
summary(m3)

# Text of Article ---------------------------------------------------------

# •	Gender crosstab on reported taking at least one step 
gen_xtab(df, anystep)

# •	Gender crosstab on very knowledgeable about public policy
gen_xtab(df, q37b)

# •	Gender crosstab on believing policy expertise is essential in a candidate
gen_xtab(df, q13_b)

# •	Gender crosstab on handle criticism well
gen_xtab(df, q37f)

# •	Frequency of women who think withstanding scrutiny is an important quality in a candidate 
gen_xtab(df, q13_e) %>% 
  adorn_ns()

# •	Frequency of women who think women face more scrutiny and challenges than men do
gen_xtab(df, q16_c) %>% 
  adorn_ns()

# •	Gender crosstab on recruited by an elected official
gen_xtab(df, receelected)

# •	Gender crosstab on recruited by party leader
gen_xtab(df, recparty)

# •	Frequency of GOP women who think women face more challenges than men when running for office
df %>% 
  filter(newPID == "Republican") %>% 
  gen_xtab(q16_c) %>% 
  adorn_ns()

# •	Frequency of Democratic women who think women face more challenges than men
df %>% 
  filter(newPID == "Democrat") %>% 
  gen_xtab(q16_c) %>% 
  adorn_ns()

# •	Frequency of women who agree that “Someone like me would have a hard time running for office.”
gen_xtab(df, q37e) %>% 
  adorn_ns()

# Predicted Probabilities -------------------------------------------------

# Quick function for mode of dummy variables
Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

# Getting means/modes for predicted probabilities
atmeans <- df_numeric %>%
  select(
    gender,
    educ,
    faminc_new,
    newefficacy,
    newpolparticipation,
    Democrat,
    Republican,
    black,
    latino,
    asian,
    PoliticalInterest,
    married,
    child18,
    birthyr,
    QUALIFIED,
    Formal,
    Informal
  ) %>%
  mutate(across(
    .cols = c(
      faminc_new,
      newefficacy,
      newpolparticipation,
      PoliticalInterest,
      birthyr,
      QUALIFIED
    ),
    \(x) mean(x, na.rm = T)
  ),
  across(
    .cols = c(
      educ,
      gender,
      Democrat,
      Republican,
      black,
      latino,
      asian,
      married,
      child18,
      Formal,
      Informal
    ),
    \(x) Mode(x)
  )) %>%
  slice_head(n = 1)

# •	Table 1, column 1
# with continuous variables set at their means and dummy variables at their modes
# predicted prob of considering a candidacy for:

# white women,
predict(m1,
        newdata = mutate(
          atmeans,
          gender = 1,
        ),
        type = "response")

# white men, 
predict(m1,
        newdata=mutate(
          atmeans,
          gender = 0
        ),
        type="response")

# Latinas, 
predict(m1,
        newdata=mutate(
          atmeans,
          gender = 1,
          latino = 1
        ),
        type="response")

# latinos. 
predict(m1,
        newdata=mutate(
          atmeans,
          gender = 0,
          latino = 1
        ),
        type="response")

# black women, 
predict(m1,
        newdata=mutate(
          atmeans,
          gender = 1,
          black = 1
        ),
        type="response")

# black men, 
predict(m1,
        newdata=mutate(
          atmeans,
          gender = 0,
          black = 1
        ),
        type="response")

# Asian women, 
predict(m1,
        newdata=mutate(
          atmeans,
          gender = 1,
          asian = 1
        ),
        type="response")

# Asian men.
predict(m1,
        newdata=mutate(
          atmeans,
          gender = 0,
          asian = 1
        ),
        type="response")


# •	Table 1, column 3 
  # with continuous variables set at their means and dummy variables at their modes
  # predicted prob of considering a candidacy for:

# white female potential candidate who doesn’t think she is qualified, 
predict(m3,
        newdata=mutate(
          atmeans,
          gender = 1,
          QUALIFIED = 1
        ),
        type="response")

# white female potential candidate who thinks she’s “very qualified,” 
predict(m3,
        newdata=mutate(
          atmeans,
          gender = 1,
          QUALIFIED = 4
        ),
        type="response")

# white male potential candidate who doesn’t think he is qualified, 
predict(m3,
        newdata=mutate(
          atmeans,
          gender = 0,
          QUALIFIED = 1
        ),
        type="response")

# white male potential candidate who thinks he’s “very qualified.” 
predict(m3,
        newdata=mutate(
          atmeans,
          gender = 0,
          QUALIFIED = 4
        ),
        type="response")

# •	Table 1, column 3 
  # with continuous variables set at their means and dummy variables at their modes
  # predicted prob of considering a candidacy for: 

# white woman with no political recruitment, 
predict(m3,
        newdata=mutate(
          atmeans,
          gender = 1,
          Formal = 0
        ),
        type="response")

# white woman with recruitment, 
predict(m3,
        newdata=mutate(
          atmeans,
          gender = 1,
          Formal = 1
        ),
        type="response")

# black woman with no recruitment,
predict(m3,
        newdata=mutate(
          atmeans,
          gender = 1,
          black = 1,
          Formal = 0
        ),
        type="response")

# black woman with recruitment, 
predict(m3,
        newdata=mutate(
          atmeans,
          gender = 1,
          black = 1,
          Formal = 1
        ),
        type="response")

# Latina without recruitment, 
predict(m3,
        newdata=mutate(
          atmeans,
          gender = 1,
          latino = 1,
          Formal = 0
        ),
        type="response")

# Latina with recruitment, 
predict(m3,
        newdata=mutate(
          atmeans,
          gender = 1,
          latino = 1,
          Formal = 1
        ),
        type="response")

# Asian woman without recruitment. 
predict(m3,
        newdata=mutate(
          atmeans,
          gender = 1,
          asian = 1,
          Formal = 0
        ),
        type="response")

# Asian woman with recruitment,
predict(m3,
        newdata=mutate(
          atmeans,
          gender = 1,
          asian = 1,
          Formal = 1
        ),
        type="response")


# Figure A1 ---------------------------------------------------------------

#   •	Voted in 2000
gen_xtab(df, q1_1)

# •	Posted about politics on social media
gen_xtab(df, q1_6)

# •	Gave money to a campaign
gen_xtab(df, q1_4)

# •	Contacted an elected official
gen_xtab(df, q2_2)

# •	Attended a political protest or rally
gen_xtab(df, q1_3)

# •	Served on a non-profit board
gen_xtab(df, q2_3)

# •	Volunteered for a political campaign
gen_xtab(df, q2_5)


# Table A1 ----------------------------------------------------------------

# Frequencies for men and women on:

#   •	White
# •	black
# •	latino
# •	Asian
# •	Other race
gen_xtab(df, race)

# •	Democrat (including leaners)
# •	Independent
# •	Republican (including leaners)
gen_xtab(df, newPID)

# •	Less than $70,000
# •	$70,000 - $99,999
# •	$100,000 - $149,000
# •	$150,000 and above
gen_xtab(df, incomequartiles)

# •	Lawyer
# •	Business
# •	Educator
# •	Other profession
gen_xtab(df, q31)

# •	BA
# •	Post-graduate degree
gen_xtab(df, educ)

# •	Age
gen_xtab(df, AgeThree)


# Table A2 ----------------------------------------------------------------

# Percent women and men who have considered running for office on:

#   •	White
df %>% 
  filter(race == "White") %>% 
  gen_xtab(Considered)

# •	black
df %>% 
  filter(race == "Black") %>% 
  gen_xtab(Considered)

# •	latino
df %>% 
  filter(race == "Hispanic") %>% 
  gen_xtab(Considered)

# •	Asian
df %>% 
  filter(race == "Asian") %>% 
  gen_xtab(Considered)

# •	Democrat (including leaners)
# •	Independent
# •	Republican (including leaners)
gen_xtab(df, Considered, newPID)

# •	Less than $70,000
# •	$70,000 - $99,999
# •	$100,000 - $149,000
# •	$150,000 and above
gen_xtab(df, Considered, incomequartiles)

# •	Under 40
# •	40 – 59
# •	60 and over
gen_xtab(df, Considered, AgeThree)

# •	No children at home
# •	Children at home
gen_xtab(df, Considered, child18)

# •	Married (or living with partner)
# •	Not married
gen_xtab(df, Considered, married)


# Reconstructing Analysis Variables ---------------------------------------

df_numeric2 <- df_numeric %>% 
  mutate(
    # Four professions subsample:
    fourprofessions = case_when(
      q31 %in% 1:4 ~ 1,
      is.na(q31) ~ NA_integer_,
      TRUE ~ 0
    ),
    # PID with leaners coded as partisans:
    newPID = case_when(
      pid7 == 4 ~ 2,
      pid7 %in% 5:7 ~ 3,
      pid7 %in% 1:3 ~ 1
    ),
    # Income quartiles:
    incomequartiles = case_when(
      faminc_new %in% 1:7 ~ 1,
      faminc_new %in% 8:9 ~ 2,
      faminc_new %in% 10:11 ~ 3,
      faminc_new %in% 12:16 ~ 4
    ),
    # Age cohort:
    AgeThree = case_when(
      birthyr %in% 1936:1961 ~ 3,
      birthyr %in% 1982:1999 ~ 1,
      birthyr %in% 1962:1981 ~ 2
    ),
    # Married:
    married = case_when(
      marstat %in% c(1,6) ~ 1,
      marstat %in% 2:5 ~ 0
    ),
    # Ran for office:
    Ran = case_when(
      q5 == 1 ~ 1,
      is.na(q5) ~ NA_integer_,
      TRUE ~ 0
    ),
    # Considered running for office:
    Considered = case_when(
      q5 %in% 1:3 ~ 1,
      is.na(q5) ~ NA_integer_,
      TRUE ~ 0
    ),
    # Took a concrete step:
    concretesteps = 
      q5c_1 + 
      q5c_2 + 
      q5c_3 + 
      q5c_4 +
      q5c_5 +
      q5c_6,
    anystep = case_when(
      concretesteps == 0 ~ 0,
      concretesteps > 0 ~ 1
    ),
    # Political participation score:
    newpolparticipation =
      q1_1 +
      q1_4 +
      q2_5 + 
      q2_2 + 
      q1_6 + 
      q1_3 +
      q2_3,
    # Whether they’ve been recruited by a formal political actor:
    formalnumber = 
      receelected +
      recparty +
      recactvist,
    Formal = case_when(
      formalnumber == 0 ~ 0,
      formalnumber > 0 ~ 1
    ),
    # Whether they’ve been recruited by a personal source:
    informalnumber = 
      reccolleague +
      recspouse +
      recfamily +
      recreligious,
    Informal = case_when(
      informalnumber == 0 ~ 0,
      formalnumber > 0 ~ 1
    )
  )
